
///////////////Set-up file for "How Much Should We Trust the Dictator's GDP Growth Estimates?" by Luis R. Martinez. last edited: 2022/02/11

/*

///////////////////////NOTES//////////////////////////////////

//Description: 

This do-file combines all the raw data and produces the final datasets used in the analysis.

// Instructions:

*1. Fix global "dir" in line 40 to the destination folder
*2. Remove star (*) from package installation as needed in lines 42-52.
*3. Destination folder must include following folders:
	"Data" w/ following subfolders:
	-"raw" which includes all original raw data from different sources in various sub-folders (included in replication package)
	-"temp" which includes temporary datasets produced by this do-file
	-"master" which includes the final datasets used in the analysis
	
//	Computational Environment:

Operating System: Windows 10
Stata version: Stata/MP 17.0 for Windows (64 bit)
Estimated running time: 10 minutes

//	Content of Do-file:

*Sections 1-15: data merging
*Sections 16-19: data cleaning and processing
*Sections 20-22: creation of auxiliary datasets

///////////////////////////////////////////////////////////////

*/

clear all
set more off
global dir "XXX"	

*ssc install kountry,replace
*ssc install splitvallabels,replace
*ssc install reghdfe,replace
*ssc install xtabond2,replace
*ssc install winsor2,replace
*ssc install spmap,replace
*ssc install geo2xy
*ssc install outreg2
*ssc install wbopendata
*ssc install labutil
*net install http://fmwww.bc.edu/RePEc/bocode/i/ineqdec0

timer clear 1
timer on 1

///////////// 1. import data from World Bank

//A: Access to electricity from WB server (incomplete before) + creates panel
*wbopendata, language(en - English) indicator(EG.ELC.ACCS.ZS;EG.ELC.ACCS.UR.ZS;EG.ELC.ACCS.RU.ZS) long clear // last accessed 06/25/2021

use "${dir}\Data\raw\WB World Development Indicators\WDI25 Jun 2021.dta" 

//B: Entire WDI 2014-11

*I downloaded the November 2014 vintage of the WDI from https://datatopics.worldbank.org/world-development-indicators/wdi-archives.html (last accessed 05/25/2021)

preserve
	clear
	import excel "${dir}\Data\raw\WB World Development Indicators\WDI_2014_11.xlsx", sheet("Data") firstrow
	sort CountryCode IndicatorCode
	keep if  IndicatorCode=="NY.GDP.MKTP.KN"|IndicatorCode=="NY.GDP.PCAP.KN"|IndicatorCode=="NY.GNP.PCAP.CD"|IndicatorCode=="SP.POP.TOTL"|IndicatorCode=="AG.LND.TOTL.K2"|IndicatorCode=="EG.USE.ELEC.KH"|IndicatorCode=="NY.GDP.MKTP.KD.ZG"|IndicatorCode=="NV.AGR.TOTL.ZS"|IndicatorCode=="AG.LND.AGRI.ZS"|IndicatorCode=="NY.GDP.TOTL.RT.ZS"|IndicatorCode=="NY.GDP.PETR.RT.ZS"|IndicatorCode=="NV.IND.TOTL.ZS"|IndicatorCode=="NV.IND.MANF.ZS"|IndicatorCode=="NV.SRV.TETC.ZS"|IndicatorCode=="SP.URB.TOTL.IN.ZS"|IndicatorCode=="NY.GDP.PCAP.KD"|IndicatorCode=="SE.PRM.NENR"|IndicatorCode=="SP.DYN.LE00.IN"|IndicatorCode=="SP.DYN.IMRT.IN"|IndicatorCode=="NE.CON.PETC.ZS"|IndicatorCode=="NE.GDI.TOTL.ZS"|IndicatorCode=="NE.CON.GOVT.ZS"|IndicatorCode=="NE.EXP.GNFS.ZS"|IndicatorCode=="NE.IMP.GNFS.ZS"
	
		*GDP (constant LCU)	NY.GDP.MKTP.KN
		*GDP per capita (constant LCU)	NY.GDP.PCAP.KN
		*GNI per capita, Atlas method (current US$)
		*Population, total	SP.POP.TOTL
		*Land area (sq. km)	AG.LND.TOTL.K2
		*Electric power consumption (kWh)	EG.USE.ELEC.KH
		*GDP growth (annual %)	NY.GDP.MKTP.KD.ZG
		*Agriculture, value added (% of GDP)	NV.AGR.TOTL.ZS
		*Agricultural land (% of land area)	AG.LND.AGRI.ZS
		*Total natural resources rents (% of GDP)	NY.GDP.TOTL.RT.ZS
		*Oil rents (% of GDP)	NY.GDP.PETR.RT.ZS
		*Industry, value added (% of GDP)	NV.IND.TOTL.ZS
		*Manufacturing, value added (% of GDP)	NV.IND.MANF.ZS
		*Services, etc., value added (% of GDP)	NV.SRV.TETC.ZS
		*Urban population (% of total)	SP.URB.TOTL.IN.ZS
		*GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD
		*School enrollment, primary (% net)	SE.PRM.NENR
		*Life expectancy at birth, total (years)	SP.DYN.LE00.IN
		*Mortality rate, infant (per 1,000 live births)	SP.DYN.IMRT.IN
		*Household final consumption expenditure, etc. (% of GDP)	NE.CON.PETC.ZS
		*Gross capital formation (% of GDP)	NE.GDI.TOTL.ZS
		*General government final consumption expenditure (% of GDP)	NE.CON.GOVT.ZS
		*Exports of goods and services (% of GDP)	NE.EXP.GNFS.ZS
		*Imports of goods and services (% of GDP)	NE.IMP.GNFS.ZS
	
	foreach v of varlist E-BG {
		local x : variable label `v'
		rename `v' yr`x'
	}
	reshape long yr, i(CountryCode IndicatorCode IndicatorName) j(year)
	gen series=subinstr(IndicatorCode,".","_",.)
	drop Indicator*
	reshape wide yr,i(CountryCode year) j(series) string
	foreach x in AG_LND_AGRI_ZS AG_LND_TOTL_K2 EG_USE_ELEC_KH NV_AGR_TOTL_ZS NV_IND_MANF_ZS NV_IND_TOTL_ZS NY_GDP_MKTP_KD_ZG NY_GDP_MKTP_KN NY_GDP_PCAP_KD NY_GDP_PCAP_KN NY_GNP_PCAP_CD NY_GDP_PETR_RT_ZS NY_GDP_TOTL_RT_ZS SE_PRM_NENR SP_DYN_IMRT_IN SP_DYN_LE00_IN SP_POP_TOTL SP_URB_TOTL_IN_ZS NE_CON_PETC_ZS NE_GDI_TOTL_ZS NE_CON_GOVT_ZS NE_EXP_GNFS_ZS NE_IMP_GNFS_ZS NV_SRV_TETC_ZS{
	rename yr`x' `x'_2014_11
	}
	rename *,lower
	replace countrycode="AND" if countrycode=="ADO" //Andorra
	replace countrycode="COD" if countrycode=="ZAR" //DRC
	replace countrycode="IMN" if countrycode=="IMY" //Isle of Man
	replace countrycode="ROU" if countrycode=="ROM" //Romania
	replace countrycode="TLS" if countrycode=="TMP" //East Timor
	replace countrycode="XKX" if countrycode=="KSV" //Kosovo
	replace countrycode="PSE" if countrycode=="WBG" //Palestine
	
	tempfile wdi14_merge
	save `wdi14_merge'
restore
merge 1:1 countrycode year using `wdi14_merge'	
drop if _merge==2	//Aggregates
drop _merge

//C. GDP data from previous vintages

*Downloaded from https://databank.worldbank.org/source/world-development-indicators (last accessed on 05/11/2021)

preserve
	clear
	import excel "${dir}\Data\raw\WB World Development Indicators\Data_Extract_From_WDI_Database_Archives_(beta).xlsx", sheet("Data") firstrow
	sort CountryCode SeriesCode VersionCode
	drop in 1/5
	generate vdate = string(VersionName, "%td")
	replace vdate=substr(vdate,3,7)
	egen vintage=concat( SeriesCode vdate)
	keep CountryCode vintage YR*
	reshape long YR, i(CountryCode vintage ) j(year)
	gen series=subinstr(vintage,".","_",.)
	drop vintage
	replace series=subinstr(series,"_KN","_KN_",.)
	reshape wide YR,i(CountryCode year) j(series) string
	forvalues x=2014/2021{
		foreach y in jan feb mar apr may jun jul aug sep oct nov dec{
			if "`y'"=="jan" local z "01"
			if "`y'"=="feb" local z "02"
			if "`y'"=="mar" local z "03"
			if "`y'"=="apr" local z "04"
			if "`y'"=="may" local z "05"
			if "`y'"=="jun" local z "06"
			if "`y'"=="jul" local z "07"
			if "`y'"=="aug" local z "08"
			if "`y'"=="sep" local z "09"
			if "`y'"=="oct" local z "10"
			if "`y'"=="nov" local z "11"
			if "`y'"=="dec" local z "12"
			cap rename YRNY_GDP_MKTP_KN_`y'`x' ny_gdp_mktp_kn_v`x'_`z'
			cap label var ny_gdp_mktp_kn_v`x'_`z' "GDP in LCU, WDI `y' `x' vintage"
		}
	}	
	foreach x of varlist ny_gdp_*{
		replace `x'="" if `x'==".."
	}
	destring ny_gdp*,replace
	rename CountryCode countrycode
	order countrycode year
	order ny_gdp*, after(year) alpha
	drop if year==2021
	tempfile wdi_merge
	save `wdi_merge'
restore
merge 1:1 countrycode year using `wdi_merge'	
drop if _merge==2	//Aggregates
drop _merge

//D. data on GNI per capita from previous vintages

preserve
	clear
	import excel "${dir}\Data\raw\WB World Development Indicators\WDI_GNI_vintages.xlsx", sheet("Data") firstrow
	sort CountryCode SeriesCode VersionCode
	drop in 1/5
	gen vyear=yofd(VersionName )
	tostring vyear,replace
	egen vintage=concat( SeriesCode vyear )
	keep CountryCode vintage YR*
	reshape long YR, i(CountryCode vintage ) j(year)
	gen series=subinstr(vintage,".","_",.)
	drop vintage
	reshape wide YR,i(CountryCode year) j(series) string
	forvalues x=1989/2020{
		cap rename YRNY_GNP_PCAP_CD`x' ny_gnp_pcap_cd_v`x'
		cap label var ny_gnp_pcap_cd_v`x' "GNI per capita, Atlas method (current US$), WDI v`x'"
	}
	foreach x of varlist ny_gnp_pcap_cd_v1989-ny_gnp_pcap_cd_v2020{
		replace `x'="" if `x'==".."		
	}
	destring ny_gnp_pcap_cd_v1989-ny_gnp_pcap_cd_v2020,replace
	rename CountryCode countrycode
	drop ny_gnp_pcap_cd_v1994 ny_gnp_pcap_cd_v1995 //all missings
	drop if year==2021
	tempfile gni_merge
	save `gni_merge'
restore
merge 1:1 countrycode year using `gni_merge'	
drop if _merge==2	//Aggregates
drop _merge

//E. data on ODA/GNI (missing for several countries in 2014/11)

preserve
	clear
	import excel "${dir}\Data\raw\WB World Development Indicators\WDI_ODA.xlsx", sheet("Data") firstrow
	sort CountryCode SeriesCode VersionCode
	drop in 1/5
	keep CountryCode YR*
	reshape long YR, i(CountryCode) j(year)
	rename YR dt_oda_odat_gn_zs
	label var dt_oda_odat_gn_zs "Net ODA received (% of GNI), WDI Apr15"
	replace dt_oda_odat_gn_zs="" if dt_oda_odat_gn_zs==".."		
	destring dt_oda_odat_gn_zs,replace
	rename CountryCode countrycode
	drop if year==2021
	tempfile oda_merge
	save `oda_merge'
restore
merge 1:1 countrycode year using `oda_merge'	
drop if _merge==2	//Aggregates
drop _merge

//F. data on informality downloaded from https://www.enterprisesurveys.org/en/data/exploretopics/informality (last accessed on 05/12/2021)

preserve
	clear
	import excel "${dir}\Data\raw\WB Enterprise Surveys\ExportedResults.xlsx", sheet("Sheet1") firstrow
	kountry Economy, from(other) stuck
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	drop _ISO3N_1
	rename _ISO3C_ countrycode
	replace country=trim(country)
	replace countrycode="CPV" if Economy=="Cabo Verde"
	replace countrycode="CIV" if Economy=="Côte d'Ivoire"
	replace countrycode="SWZ" if Economy=="Eswatini"
	replace countrycode="XKX" if Economy=="Kosovo"
	replace countrycode="MKD" if Economy=="North Macedonia"
	drop if countrycode==""
	rename Percentoffirmscompetingagain constraint_informal
	rename Percentoffirmsformallyregist start_formal 
	rename Numberofyearsfirmoperatedwi years_informal 
	rename Percentoffirmsidentifyingpra competition_informal
	foreach x of varlist constraint_informal start_formal years_informal competition_informal{
		replace `x'="" if `x'=="n.a."
		destring `x',replace
	}
	drop Economy
	tempfile informal_merge
	save `informal_merge'
restore
merge m:1 countrycode using `informal_merge'	
drop _merge

//G. data on statistical capacity downloaded from https://databank.worldbank.org/source/statistical-capacity-indicators# (last accessed on 05/13/2021)

preserve
	clear
	import excel "${dir}\Data\raw\WB World Development Indicators\Data_Extract_From_Statistical_Capacity_Indicators.xlsx", sheet("Data") firstrow
	sort CountryCode SeriesCode 
	drop in 1/5
	gen series=substr(SeriesCode,12,.) if SeriesCode!="IQ.SCI.MTHD"&SeriesCode!="IQ.SCI.OVRL"&SeriesCode!="IQ.SCI.PRDC"&SeriesCode!="IQ.SCI.SRCE"
	replace series="iq_sci_mthd" if SeriesCode=="IQ.SCI.MTHD"
	replace series="iq_sci_ovrl" if SeriesCode=="IQ.SCI.OVRL"
	replace series="iq_sci_prdc" if SeriesCode=="IQ.SCI.PRDC"
	replace series="iq_sci_srce" if SeriesCode=="IQ.SCI.SRCE"	
	keep CountryCode series YR*
	reshape long YR, i(CountryCode series) j(year)
	reshape wide YR,i(CountryCode year) j(series) string
	foreach x in agcen excncpt exdebt fscov gdp gender hiv hlthsurv immun impexp indust iq_sci_mthd iq_sci_ovrl iq_sci_prdc iq_sci_srce malnut matern mortal nabase popcen popreg poverty povsurv prcpbase primcomp sdds unesco water who{
		rename YR`x' sci_`x'
		replace sci_`x'="" if sci_`x'==".."
		destring sci_`x',replace
	}
	rename CountryCode countrycode
	collapse sci*,by(countrycode)
	tempfile sci_merge
	save `sci_merge'
restore
merge m:1 countrycode using `sci_merge'	
drop if _merge==2 //aggregates
drop _merge

/////////////// 2. combine with data from Freedom House

//A. Freedom in the World Index

*I download raw data from https://freedomhouse.org/sites/default/files/2020-02/2020_Country_and_Territory_Ratings_and_Statuses_FIW1973-2020.xlsx (last accessed on 12/03/2020)
*Relevant information is in the "Country Ratings, Statuses" sheet, but headers complicate importing into Stata, so I have fixed by hand and saved as FIW.csv
*Periodicity is a bit irregular in the 1980s. e.g., data is missing for 1982 (i.e. divided between 1981 and 1983)

preserve
	clear
	import delimited "$dir\Data\raw\Freedom House\FIW.csv", varnames(1) encoding(ISO-8859-2) 

	*Wide to long
	reshape long pr_ cl_ status_, i(cntry_name) j(year)
	
	*ISO3C code
	kountry cntry_name, from(other) stuck
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	drop _ISO3N_1
	rename _ISO3C_ cntcode3
	
	*Manually input empty missing codes
	*For countries with merges (e.g. East and West Germany), I am averaging across the units merged
	replace cntcode3 = "CPV" if cntry_name == "Cabo Verde"
	replace cntcode3 = "CZE" if cntry_name == "Czechoslovakia"
	replace cntcode3 = "SWZ" if cntry_name == "Eswatini"
	replace cntcode3 = "DEU" if cntry_name == "Germany, E. "
	replace cntcode3 = "DEU" if cntry_name == "Germany, W. "
	replace cntcode3 = "XKX" if cntry_name == "Kosovo"
	replace cntcode3 = "MNE" if cntry_name == "Montenegro"
	replace cntcode3 = "MKD" if cntry_name == "North Macedonia"
	replace cntcode3 = "MNE" if cntry_name == "Montenegro"
	replace cntcode3 = "SSD" if cntry_name == "South Sudan"
	replace cntcode3 = "TWN" if cntry_name == "Taiwan"
	replace cntcode3 = "VNM" if cntry_name == "Vietnam, N."
	replace cntcode3 = "VNM" if cntry_name == "Vietnam, S."	
	replace cntcode3 = "YEM" if cntry_name == "Yemen, N."
	replace cntcode3 = "YEM" if cntry_name == "Yemen, S."
	
	*Rename variables
	rename cntcode3 countrycode
	rename cntry_name countryname
	rename pr_ fiw_pr
	rename cl_ fiw_cl
	
	*Replace values
	replace fiw_pr = "2" if fiw_pr == "2(5)"
	replace fiw_pr = "" if fiw_pr == "-"
	replace fiw_cl = "3" if fiw_cl == "3(6)"
	replace fiw_cl = "" if fiw_cl == "-"
	destring fiw_pr fiw_cl, replace
	
	collapse (mean) fiw_pr fiw_cl, by(countrycode year)

	*create tempfile  for merge
	tempfile fiw_merge
	save `fiw_merge'
restore

merge 1:1 countrycode year using `fiw_merge'	
/*Details on merge:
(i) Data from Freedom House is missing for 23 countries in WB between 1972 and 2019 (_merge==1): American Samoa;Aruba;Bermuda;British Virgin Islands; Cayman Islands; Channel Islands; Curacao; Faroe Islands; French Polynesia; Gibraltar; Greenland; Guam; Hong Kong SAR, China; Isle of Man; Macao SAR, China; New Caledonia; Northern Mariana Islands; Puerto Rico; Sint Maarten (Dutch part); St Martin (French part); Turks and Caicos Islands; Virgin Islands (US); West Bank and Gaza
(ii)Freedom House also has no data for 1960-1971 or for 2020
(iii)Data from World Bank is just missing for Taiwan for all years
*/
drop _merge

//B. Electoral Democracy dummy

*I download raw data in June 2017
*Freedom House no longer produces this measure: "Prior to the 2021 edition, Freedom in the World assigned the designation “electoral democracy” to countries that had met certain minimum standards for political rights and civil liberties; territories were not included in the list of electoral democracies. An electoral democracy designation required a score of 7 or better in the Electoral Process subcategory, an overall political rights score of 20 or better, and an overall civil liberties score of 30 or better. In order to simplify the report's methodological outputs, Freedom House is no longer highlighting this designation, but the underlying scores remain publicly available." https://freedomhouse.org/reports/freedom-world/faq-freedom-world#ElectoralDemocracy (last accessed on 2021/05/07)
*headers complicate importing into Stata, so I have fixed by hand and saved as FIW_electoral_democracy.csv
	
preserve
	clear
	import delimited "$dir\Data\raw\Freedom House\FIW_electoral_democracy.csv", varnames(1) encoding(ISO-8859-2) 

	*Wide to long
	reshape long democracy, i(country) j(year)
	
	*create autocracy measure
	replace democracy=trim(democracy)
	gen autocracyFH=democracy=="No" if democracy!="-" 
	drop democracy
	
	*ISO3C code
	kountry country, from(other) stuck
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	drop _ISO3N_1
	rename _ISO3C_ cntcode3
	
	*Manually input empty missing codes
	*For countries with merges (e.g. East and West Germany), I am averaging across the units merged
	replace cntcode3 = "KNA" if country == "St. Kitts & Nevis"
	replace cntcode3 = "CZE" if country == "Czechoslovakia"
	replace cntcode3 = "DEU" if country == "Germany E. "
	replace cntcode3 = "DEU" if country == "Germany W. "
	replace cntcode3 = "XKX" if country == "Kosovo"
	replace cntcode3 = "VNM" if country == "Vietnam N."
	replace cntcode3 = "VNM" if country == "Vietnam S."	
	replace cntcode3 = "YEM" if country == "Yemen N."
	replace cntcode3 = "YEM" if country == "Yemen S."
	
	*Rename variables
	rename cntcode3 countrycode
	rename country countryname
		
	collapse (mean) autocracyFH, by(countrycode year)

	*create tempfile  for merge
	tempfile fh_merge
	save `fh_merge'
restore

merge 1:1 countrycode year using `fh_merge'	
/*Details on merge:
(i) Freedom House - Electoral Democracy data is missing for 23 countries in WB between 1989 and 2015 (_merge==1): American Samoa;Aruba;Bermuda;British Virgin Islands; Cayman Islands; Channel Islands; Curacao; Faroe Islands; French Polynesia; Gibraltar; Greenland; Guam; Hong Kong SAR, China; Isle of Man; Macao SAR, China; New Caledonia; Northern Mariana Islands; Puerto Rico; Sint Maarten (Dutch part); St Martin (French part); Turks and Caicos Islands; Virgin Islands (US); West Bank and Gaza
(ii)Freedom House-Elecotral Democracy also has no data before 1989 or after 2015
*/
drop _merge
	
/////////////// 3. combine with data on nighttime lights

//A. Own calculations based on Li, Zhou, Zhao and Zhao (Nature 2020)

preserve
	
	clear
	import delimited "${dir}\Data\raw\Nighttime Lights\Li et al Nature 2020\Harmo_zonalstat_1992_2018.csv"
	cap noisily rename _mean y1992_mean
	drop objectid
	
	// rename variables for reshape
	forval i = 1992/2018{
	    rename y`i'_mean Y`i'
	}
	
	// reshape
	reshape long Y, i(cntry_name) j(year)
	rename Y mean_lights_weighted
	
	//	ISO 3 letter code
	kountry cntry_name, from(other) stuck
	cap noisily drop NAMES_STD
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	drop _ISO3N_1
	rename _ISO3C_ iso3v10
	
	//	Filled empty codes
	replace iso3v10 = "ATA" if cntry_name == "Antarctica"
	replace iso3v10 = "BLR" if cntry_name == "Byelarus"
	replace iso3v10 = "BVT" if cntry_name == "Bouvet Island"
	replace iso3v10 = "MNP" if cntry_name == "Northern Mariana Islands"
	replace iso3v10 = "FLK" if cntry_name == "Falkland Islands (Islas Malvinas)"
	replace iso3v10 = "ATF" if cntry_name == "French Southern & Antarctic Lands"
	replace iso3v10 = "GGY" if cntry_name == "Guernsey"
	replace iso3v10 = "HMD" if cntry_name == "Heard Island & McDonald Islands"
	replace iso3v10 = "IOT" if cntry_name == "British Indian Ocean Territory"
	replace iso3v10 = "JEY" if cntry_name == "Jersey"
	replace iso3v10 = "MNE" if cntry_name == "Montenegro"
	replace iso3v10 = "NFK" if cntry_name == "Norfolk Island"
	replace iso3v10 = "PCN" if cntry_name == "Pitcairn Islands"
	replace iso3v10 = "SPM" if cntry_name == "St. Pierre and Miquelon"
	replace iso3v10 = "SGS" if cntry_name == "South Georgia and the South Sandwich Is"
	replace iso3v10 = "TKL" if cntry_name == "Tokelau"
	replace iso3v10 = "TWN" if cntry_name == "Taiwan"
	replace iso3v10 = "VIR" if cntry_name == "Virgin Islands"
	replace iso3v10 = "IMN" if cntry_name == "Man, Isle of"
	replace iso3v10 = "PLW" if cntry_name == "Pacific Islands (Palau)"
	replace iso3v10 = "WAK" if cntry_name == "Wake Island"
	replace iso3v10 = "PSE" if cntry_name == "West Bank"
	
	//	Drop empty cells
	drop if iso3v10 == ""	
	drop cntry_name 
	rename mean_lights_weighted avdn
	order iso3v10 year
	
	tempfile li_merge
	save `li_merge'
	
restore

gen iso3v10=countrycode
label var iso3v10 "Equivalent to countrycode"
merge 1:1 iso3 year using `li_merge'	
drop if _merge==2	//30 small islands and posessions that are not in the World Bank dataset	
drop _merge
rename avdn avdn18

//B. DMSP-OLS: own calculations

preserve

	clear	
	// global sats - year
	global sat_years 101992 101993 101994 121994 121995 121996 121997 ///
	121998 121999 141997 141998 141999 142000 142001 142002 ///
	142003 152000 152001 152002 152003 152004 152005 152006 ///
	152007 162004 162005 162006 162007 162008 162009 182010 ///
	182011 182012 182013
	
	// attributes sat-year
	foreach i in $sat_years{
		clear 
		import delimited "${dir}\Data\raw\Nighttime Lights\DMSP-OLS\Extensionzonalstat_`i'.csv"					
		gen sat_year = `i'
		tempfile append_`i'
		save `append_`i'', replace
	}	
	clear
	foreach i in $sat_years{
		append using `append_`i'', force
	}	
		
	//ISO 3 letter code
	kountry cntry_name, from(other) stuck
	cap noisily drop NAMES_STD
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	drop _ISO3N_1
	rename _ISO3C_ iso3v10
	
	//Filled empty codes
	replace iso3v10 = "ATA" if cntry_name == "Antarctica"
	replace iso3v10 = "BLR" if cntry_name == "Byelarus"
	replace iso3v10 = "BVT" if cntry_name == "Bouvet Island"
	replace iso3v10 = "MNP" if cntry_name == "Northern Mariana Islands"
	replace iso3v10 = "FLK" if cntry_name == "Falkland Islands (Islas Malvinas)"
	replace iso3v10 = "ATF" if cntry_name == "French Southern & Antarctic Lands"
	replace iso3v10 = "GGY" if cntry_name == "Guernsey"
	replace iso3v10 = "HMD" if cntry_name == "Heard Island & McDonald Islands"
	replace iso3v10 = "IOT" if cntry_name == "British Indian Ocean Territory"
	replace iso3v10 = "JEY" if cntry_name == "Jersey"
	replace iso3v10 = "MNE" if cntry_name == "Montenegro"
	replace iso3v10 = "NFK" if cntry_name == "Norfolk Island"
	replace iso3v10 = "PCN" if cntry_name == "Pitcairn Islands"
	replace iso3v10 = "SPM" if cntry_name == "St. Pierre and Miquelon"
	replace iso3v10 = "SGS" if cntry_name == "South Georgia and the South Sandwich Is"
	replace iso3v10 = "TKL" if cntry_name == "Tokelau"
	replace iso3v10 = "TWN" if cntry_name == "Taiwan"
	replace iso3v10 = "VIR" if cntry_name == "Virgin Islands"
	
	//Drop 
	drop v1
	drop if iso3v10 == ""
	
	//To year - country level
	tostring sat_year, g(str_sat_year)
	gen year = substr(str_sat_year,-4,4)
	destring year, replace
	
	//Collapse data - average year with more than one sat
	collapse (mean) mean_lights_weighted (sum) mean_cvg_weighted, by(iso3v10 year)

	//Land area (square km) with missing lights data south of the Arctic Circle - drop less than 5 percent
	replace mean_lights_weighted = . if mean_cvg_weighted > 0.05
	
	//Prepare data
	rename mean_lights_weighted avdn
	rename mean_cvg_weighted pmaac 
	label var avdn 		"area-weighted average lights digital number (DN) per cell"
	label var pmaac 	"proportion of land area (square km) with missing lights data south of the Arctic Circle"
	order year iso3v10 avdn
	sort iso3v10 year		

	tempfile dmsp_merge
	save `dmsp_merge'
	
restore

merge 1:1 iso3 year using `dmsp_merge'	
*_merge==1 for 11 territories: Channel Islands, Curacao, Hong Kong, Isle of Man, Kosovo, Palau, St Martin x 2, South Sudan, Timor-Leste, West Bank and Gaza 
*_merge==2 for 28 small territories not in WB dataset
drop if _merge==2 
drop _merge pmaac
rename avdn avdn13

//top-coded and unlit cells

preserve
	
	clear	
	// global sats - year
	global sat_years 101992 101993 101994 121994 121995 121996 121997 ///
	121998 121999 141997 141998 141999 142000 142001 142002 ///
	142003 152000 152001 152002 152003 152004 152005 152006 ///
	152007 162004 162005 162006 162007 162008 162009 182010 ///
	182011 182012 182013

	// attributes sat-year	
	foreach i in $sat_years{
		clear 
		import delimited "${dir}\Data\raw\Nighttime Lights\DMSP-OLS\CountTopUnlit_`i'.csv"						
		gen sat_year = `i'
		tempfile append_`i'
		save `append_`i'', replace
	}	
	clear
	foreach i in $sat_years{
			append using `append_`i'', force
	}	

	//ISO 3 letter code
	kountry cntry_name, from(other) stuck
	cap noisily drop NAMES_STD
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	drop _ISO3N_1
	rename _ISO3C_ iso3v10
	
	//Filled empty codes
	replace iso3v10 = "ATA" if cntry_name == "Antarctica"
	replace iso3v10 = "BLR" if cntry_name == "Byelarus"
	replace iso3v10 = "BVT" if cntry_name == "Bouvet Island"
	replace iso3v10 = "MNP" if cntry_name == "Northern Mariana Islands"
	replace iso3v10 = "FLK" if cntry_name == "Falkland Islands (Islas Malvinas)"
	replace iso3v10 = "ATF" if cntry_name == "French Southern & Antarctic Lands"
	replace iso3v10 = "GGY" if cntry_name == "Guernsey"
	replace iso3v10 = "HMD" if cntry_name == "Heard Island & McDonald Islands"
	replace iso3v10 = "IOT" if cntry_name == "British Indian Ocean Territory"
	replace iso3v10 = "JEY" if cntry_name == "Jersey"
	replace iso3v10 = "MNE" if cntry_name == "Montenegro"
	replace iso3v10 = "NFK" if cntry_name == "Norfolk Island"
	replace iso3v10 = "PCN" if cntry_name == "Pitcairn Islands"
	replace iso3v10 = "SPM" if cntry_name == "St. Pierre and Miquelon"
	replace iso3v10 = "SGS" if cntry_name == "South Georgia and the South Sandwich Is"
	replace iso3v10 = "TKL" if cntry_name == "Tokelau"
	replace iso3v10 = "TWN" if cntry_name == "Taiwan"
	replace iso3v10 = "VIR" if cntry_name == "Virgin Islands"
	
	//Drop 
	drop v1
	drop if iso3v10 == ""
	
	//To year - country level
	tostring sat_year, g(str_sat_year)
	gen year = substr(str_sat_year,-4,4)
	destring year, replace

	//Collapse data - average year with more than one sat
	collapse (mean) count_lights count_unlit count_topcoded, by(iso3v10 year)
	
	//Prepare data
	label var count_lights 		"Total number of pixels - south of the Arctic Circle"
	label var count_unlit 		"Number of pixels unlit (DN - 0) - south of the Arctic Circle"
	label var count_topcoded 	"Number of pixels topcoded (DN - 63) - south of the Arctic Circle"

	order year iso3v10
	sort iso3v10 year	

	tempfile count_merge
	save `count_merge'

restore

merge 1:1 iso3 year using `count_merge'	
*_merge==1 for 11 territories: Channel Islands, Curacao, Hong Kong, Isle of Man, Kosovo, Palau, St Martin x 2, South Sudan, Timor-Leste, West Bank and Gaza 
*_merge==2 for 28 small territories not in WB dataset
drop if _merge==2 
drop _merge


// Gini index of NTL

preserve

	clear
	// global sats - year
	global sat_years 101992 101993 101994 121994 121995 121996 121997 ///
	121998 121999 141997 141998 141999 142000 142001 142002 ///
	142003 152000 152001 152002 152003 152004 152005 152006 ///
	152007 162004 162005 162006 162007 162008 162009 182010 ///
	182011 182012 182013
	
	// attributes sat-year
	foreach i in $sat_years{
			clear 
			import delimited "${dir}\Data\raw\Nighttime Lights\DMSP-OLS\histo_F`i'_lights.csv"
			gen sat_year = `i'
			tempfile append_`i'
			save `append_`i'', replace
	}	
	clear
	foreach i in $sat_years{
		append using `append_`i'', force
	}
	// reshape data
	reshape long histo_, i(objectid sat_year) j(dn)
	reshape wide histo_, i(objectid dn) j(sat_year)
	
	// rename variables
	rename histo_* CTG*
	
	// clean 
	egen counttot = rowtotal( CTG101992-CTG182013)
	drop if counttot==0
	levels objectid, local(groups)
	order CTG* objectid dn counttot
	
	// gini per sat-year
	foreach h in $sat_years{
	gen lgini`h' = . 
		qui foreach g of local groups { 
			ineqdec0 dn [fw=CTG`h'] if objectid == `g'
			replace lgini`h' = $S_gini if objectid == `g' 
		}
	}
	
	drop if dn==0
	collapse CTG* lgini*, by(objectid cntry_name)
	* if there is no light, gini = undefined, not zero:
	foreach h in $sat_years {
		replace lgini`h' = . if CTG`h'==0
	}

	keep objectid cntry_name lgini*
	
	// reshape again to long
	reshape long lgini, i(objectid cntry_name) j(sat_year)
	
	//ISO 3 letter code
	kountry cntry_name, from(other) stuck
	cap noisily drop NAMES_STD
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	drop _ISO3N_1
	rename _ISO3C_ iso3v10
	
	//Filled empty codes
	replace iso3v10 = "ATA" if cntry_name == "Antarctica"
	replace iso3v10 = "BLR" if cntry_name == "Byelarus"
	replace iso3v10 = "BVT" if cntry_name == "Bouvet Island"
	replace iso3v10 = "MNP" if cntry_name == "Northern Mariana Islands"
	replace iso3v10 = "FLK" if cntry_name == "Falkland Islands (Islas Malvinas)"
	replace iso3v10 = "ATF" if cntry_name == "French Southern & Antarctic Lands"
	replace iso3v10 = "GGY" if cntry_name == "Guernsey"
	replace iso3v10 = "HMD" if cntry_name == "Heard Island & McDonald Islands"
	replace iso3v10 = "IOT" if cntry_name == "British Indian Ocean Territory"
	replace iso3v10 = "JEY" if cntry_name == "Jersey"
	replace iso3v10 = "MNE" if cntry_name == "Montenegro"
	replace iso3v10 = "NFK" if cntry_name == "Norfolk Island"
	replace iso3v10 = "PCN" if cntry_name == "Pitcairn Islands"
	replace iso3v10 = "SPM" if cntry_name == "St. Pierre and Miquelon"
	replace iso3v10 = "SGS" if cntry_name == "South Georgia and the South Sandwich Is"
	replace iso3v10 = "TKL" if cntry_name == "Tokelau"
	replace iso3v10 = "TWN" if cntry_name == "Taiwan"
	replace iso3v10 = "VIR" if cntry_name == "Virgin Islands"
	
	//Drop 
	drop objectid
	drop if iso3v10 == ""
	
	//To year - country level
	tostring sat_year, g(str_sat_year)
	gen year = substr(str_sat_year,-4,4)
	destring year, replace
	
	//Collapse data - average year with more than one sat
	collapse (mean) lgini, by(iso3v10 year)
	
	// Prepare data
	label var lgini 		"GINI index of pixels"
	order year iso3v10 lgini
	sort iso3v10 year		

	tempfile gini_merge
	save `gini_merge'

restore

merge 1:1 iso3 year using `gini_merge'	
*_merge==1 for 11 territories: Channel Islands, Curacao, Hong Kong, Isle of Man, Kosovo, Palau, St Martin x 2, South Sudan, Timor-Leste, West Bank and Gaza 
*_merge==2 for 28 small territories not in WB dataset
drop if _merge==2 
drop _merge
rename lgini lgini13

//C. Hodler and Raschky QJE 2014

* Downloaded from https://datainspace.org/index.php/global-nighttime-lights-at-adm2-level-1992-2013/ (last accessed on 05/11/2021)

preserve
	clear
	import delimited "${dir}\Data\raw\Nighttime Lights\Hodler and Raschky QJE 2014\Nighttime_Lights_ADM2_1992_2013.csv", encoding(ISO-8859-9) 
	replace countrycode="XKX" if countryname=="Kosovo"
	replace countrycode="ROU" if countryname=="Romania"
	replace countrycode="TLS" if countryname=="East Timor"
	replace countrycode="SRB" if countryname=="Serbia"
	replace countrycode="COD" if countryname=="Democratic Republic of the Congo"
	collapse mean_light, by(countrycode year)
	rename mean_light avdnHR
	tempfile hr_merge
	save `hr_merge'
restore

merge 1:1 countrycode year using `hr_merge'
*there are minor discrepancies in sample composition (merge = 2 for Netherlands Antilles, French Guyana, etc. but this sample also does not include several small countries)
drop if _merge==2 
drop _merge

//D. Pinkovskiy and Sala-i-Martin QJE 2016

* Downloaded from https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/GDFBG1 (last accessed on 05/11/2021)

preserve
	clear
	use "${dir}\Data\raw\Nighttime Lights\Pinkovskiy and Sala-i-Martin QJE 2016\NASM_base2.dta"
	gen countrycode=isocode
	drop if countrycode=="SJM" // small island that is duplicated
	replace countrycode="COD" if countrycode=="ZAR"
	replace countrycode="ROU" if countrycode=="ROM"
	replace countrycode="DEU" if countrycode=="GER"
	keep countrycode year lrgdpchlight
	tempfile ps_merge
	save `ps_merge'
restore
merge 1:1 countrycode year using `ps_merge'
*there are minor discrepancies in sample composition
drop if _merge==2 
drop _merge
		
////////////// 4. combine with replication data from Henderson et al. (AER: 2012)

*The file "global_total_dn_uncal" can be found in a zip file called "hsw_final_tables_replication" available at https://www.openicpsr.org/openicpsr/project/112518/version/V1/view (last accessed on 12/03/2020)

merge 1:1 iso3v10 year using "${dir}\Data\raw\Henderson Storeygard Weil AER 2012\global_total_dn_uncal.dta", keepusing(lndn isonv10 lngdpwdilocal)
*Details on merge:
*_merge==1: HSW data only covers period 1992-2008. 9 countries always missing: Curacao, Hong Kong, Kosovo, Macao, Montenegro, Serbia, Saint Martin (Dutch and French parts), South Sudan. 11 countries (29 ctry-years) with scattered missings (<=6 years missing), including Russia, Canada, Finland, Sweden, Norway, USA due to Northern lights.
*_merge==2 19 small territories not in WB data. They also have no GDP data in HSW
drop if _merge==2 
drop _merge
rename lndn lndnHSW
rename lngdpwdilocal lngdpHSW
bysort countrycode (isonv10) : replace isonv10 = isonv10[_n-1] if missing(isonv10) //fill in the isonv10 code for country-years not in HSW (useful below for IDA)
	
/////////////// 5. Combine with UN data (country classification, latitude/longitude, years of schooling)

*File w/ country classification is available for download from https://unstats.un.org/unsd/methodology/m49/overview/ (last accessed on 12/03/2020)
*File w/ latitude/longitude available for download from https://population.un.org/wup/Download/Files/WUP2018-F13-Capital_Cities.xls (last accessed on 05/10/2021) 
*File w/ mean_yrs_educ available for download from http://hdr.undp.org/en/indicators/103006# (last accessed on 05/12/2021)
	
preserve
	clear
	import excel "${dir}\Data\raw\United Nations\UNSD — Methodology.xlsx", sheet("Sheet1") firstrow
	keep RegionCode RegionName SubregionCode SubregionName IntermediateRegionCode IntermediateRegionName ISOalpha3Code LeastDevelopedCountriesLDC DevelopedDevelopingCountries
	gen subsubregcode_un=IntermediateRegionCode
	replace subsubregcode_un=SubregionCode if subsubregcode_un==.&SubregionCode!=.
	gen subsubregname_un=IntermediateRegionName
	replace subsubregname_un=SubregionName if subsubregname_un==""&SubregionName!=""
	drop RegionCode RegionName SubregionCode SubregionName IntermediateRegionCode IntermediateRegionName 
	rename ISOalpha3Code countrycode
	tempfile un_merge
	save `un_merge'
restore

merge m:1 countrycode using `un_merge'	
drop if _merge==2 //33 small islands in UN data
drop _merge
		
preserve
	clear
	import excel "${dir}\Data\raw\United Nations\WUP2018-F13-Capital_Cities.xlsx", sheet("Data") cellrange(A17:J258) firstrow clear
	*country codes
	kountry Countrycode , from(iso3n) to(iso3c)
	rename _ISO3C_ cntcode3
	kountry Countryorarea , from(other) stuck
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	replace cntcode3=_ISO3C_ if cntcode3==""&_ISO3C_!=""
	drop _ISO*
	replace cntcode3="CHI" if Countryorarea=="Channel Islands"
	replace cntcode3="CUW" if Countryorarea=="Curaçao"
	replace cntcode3="IMN" if Countryorarea=="Isle of Man"
	replace cntcode3="MNP" if Countryorarea=="Northern Mariana Islands"
	replace cntcode3="SXM" if Countryorarea=="Sint Maarten (Dutch part)"
	collapse Latitude Longitude,by(cntcode3)	//I am averaging coordinates for a handful of countries with more than one capital
	
	*Rename variables
	rename cntcode3 countrycode
	rename Latitude latitude
	rename Longitude longitude
	drop if countrycode==""

	*create tempfile  for merge
	tempfile unpop_merge
	save `unpop_merge'
restore

merge m:1 countrycode using `unpop_merge'
drop if _merge==2 //33 small islands in UN data
drop _merge

preserve
	clear
	import delimited "${dir}\Data\raw\United Nations\Mean years of schooling (years).csv", varnames(7) rowrange(7)  
	local year 1990
	forvalues x=3(2)61{
		rename v`x' mean_yrs_educ`year'
		local y=`x'+1
		drop v`y'
		local year=`year'+1
	}
	drop hdirank
	kountry country, from(other) stuck
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)

	rename _ISO3C_ countrycode
	replace country=trim(country)
	replace countrycode="BOL" if country=="Bolivia (Plurinational State of)"
	replace countrycode="CPV" if country=="Cabo Verde"
	replace countrycode="CZE" if country=="Czechia"
	replace countrycode="CIV" if country=="Côte d'Ivoire"
	replace countrycode="SWZ" if country=="Eswatini (Kingdom of)"
	replace countrycode="HKG" if country=="Hong Kong, China (SAR)"
	replace countrycode="MKD" if country=="North Macedonia"
	replace countrycode="PSE" if country=="Palestine, State of"
	replace countrycode="VEN" if country=="Venezuela (Bolivarian Republic of)"
	drop if countrycode==""	//aggregates
	drop _ISO* country
	reshape long mean_yrs_educ,i(country) j(year)
	replace mean_yrs_educ="" if mean_yrs_educ==".."
	destring mean_yrs_educ,replace
	tempfile uneduc_merge
	save `uneduc_merge'
restore
merge 1:1 countrycode year using `uneduc_merge' //28 missings in UN education data
drop _merge

/////////////// 6. Combine with data on political institutions by Cheibub et al. (2010):

*The file is available for download from https://sites.google.com/site/joseantoniocheibub/datasets/democracy-and-dictatorship-revisited (last accessed on 05/05/2021)

preserve
	use "${dir}\Data\raw\Cheibub et al\ddrevisited_data_v1",clear

	*ISO3C code
	kountry ctryname, from(other) stuck
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	drop _ISO3N_1
	rename _ISO3C_ cntcode3
	
	keep if year>=1992 // I am only merging starting in 1992 to avoid dealing with country changes before then

	replace cntcode3 = "CZE" if ctryname == "Czechoslovakia"
	replace cntcode3 = "COD" if ctryname == "Democratic Republic of the Congo (Zaire, Congo-Kinshasha)"
	replace cntcode3 = "COG" if ctryname == "Congo (Brazzaville, Republic of Congo)"
	
	rename cntcode3 countrycode
	rename ctryname countryname	

	*create tempfile  for merge
	tempfile cheibub_merge
	save `cheibub_merge'
restore

merge 1:1 countrycode year using `cheibub_merge', keepusing(regime democracy)
*Details on merge:
*_merge==1: Cheibub et al data only covers period until 2008. Some small countries are missing, including Curacao, Hong Kong, Kosovo, Macao, Montenegro, Saint Martin (Dutch and French parts), South Sudan. 
drop _merge
gen autocracyDD=1-democracy
drop democracy
rename regime regimeDD 

/////////////// 7. Combine with data on political institutions from Polity V:

*The file is available for download from http://www.systemicpeace.org/inscr/p5v2018.xls (last accessed on 05/06/2021)

preserve
	clear
	import excel "${dir}\Data\raw\Polity V\p5v2018.xlsx", sheet("p5v2018") firstrow
	keep scode country year democ autoc polity2 //xrreg xrcomp xropen xconst parreg parcomp 
	keep if year>=1992 // I am only merging starting in 1992 to avoid dealing with country changes before then
	replace country=trim(country)
	kountry country, from(other) stuck
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	drop _ISO3N_1
	rename _ISO3C_ cntcode3
	replace cntcode3 = "CZE" if country == "Czechoslovakia"
	replace cntcode3 = "COG" if country == "Congo-Brazzaville"
	replace cntcode3 = "XKX" if country == "Kosovo"
	replace cntcode3 = "SDN" if country == "Sudan-North"
	rename cntcode3 countrycode
	rename country countryname	
	drop if countryname=="Serbia"&year==2006
	drop if countryname=="Sudan-North"&year==2011
	drop if scode=="ETI"&year==1993
	drop scode
	*create tempfile  for merge
	tempfile polity_merge
	save `polity_merge'
restore

merge 1:1 countrycode year using `polity_merge'
*Details on merge:
*_merge==1: I didn't merge Polity before 1992. Moreover, small countries with pop<500,000 not included.
drop _merge

foreach x of varlist democ autoc /*xrreg xrcomp xropen xconst parreg parcomp*/{
	recode `x' (-66=.) (-77=.) (-88=.)
}

rename democ democracyP5
rename autoc autocracyP5

///////////////// 8. Combine with data on democracy from Papaioannou and Siourounis (Economic Journal, 2008)  

*the file is available for download from https://docs.google.com/open?id=0B55gY54fT1krRzUtQTJsejEteUk (last accessed on 05/07/2021)

*Add WB governance data
gen wbcode=iso3v10
replace wbcode="ZAR" if wbcode=="COD"
replace wbcode="ROM" if wbcode=="ROU"
merge 1:1 wbcode year using "${dir}\Data\raw\Papaioannou and Siourounis EJ 2008\EJ_democ_data",keepusing(ev_ps1 d_democ)
drop if _merge==2	//Yugoslavia
drop _merge wbcode
egen temp=rowtotal(ev_ps1 d_democ),miss //these dummies add up to countries that either were always democratic or transitioned to democracy
gen autocracyPS=1-temp 
drop ev_ps1 d_democ temp

///////////////// 9. Combine with data on democracy from Acemoglu et al (Journal of Political Economy, 2019)  

*the file is available for download from https://economics.mit.edu/faculty/acemoglu/data/ddcg (last accessed on 06/24/2021)

*Add WB governance data
gen wbcode=iso3v10
replace wbcode="ZAR" if wbcode=="COD"
replace wbcode="ROM" if wbcode=="ROU"
replace wbcode="UAE" if wbcode=="ARE"
replace wbcode="NAU" if wbcode=="NRU"
replace wbcode="SIN" if wbcode=="SGP"
merge 1:1 wbcode year using "${dir}\Data\raw\Acemoglu et al JPE 2019\DDCGdata_final.dta",keepusing(dem)
drop if _merge==2	//Serbia and Montenegro, Taiwan
gen autocracyANRR=1-dem
drop _merge wbcode dem

///////////////// 10. Combine with data on state capacity from Chong et al. (Journal of the European Economic Association, 2014)

*the file is available for download from https://scholar.harvard.edu/shleifer/publications/letter-grading-government-efficiency (last accessed on 05/13/2021)

preserve
	clear
	import excel "${dir}\Data\raw\Chong et al JEEA 2014\maildata_fls_jeea_sep20_2013.xlsx", sheet("maildata_fls_sep20_2013") firstrow
	keep code date* maximum
	drop date_sent date_ret
	gen returned_days=datef_ret-datef_sent	//days it took for letter to get back
	replace returned_days=maximum if returned_days==.	//some letters were not returned before endline measurement
	gen Dreturned_any=1-missing(datef_ret) //dummy for letter back
	gen Dreturned_90=returned_days<=90 //dummy for letter back in <= 90 days
	collapse returned_days Dreturned_any Dreturned_90,by(code) //these figures match the ones reported in the data appendix that can be found in the folder
	rename code countrycode
	replace countrycode="ROU" if countrycode=="ROM"
	replace countrycode="COD" if countrycode=="ZAR"
	replace countrycode="XKX" if countrycode=="KSV"
	tempfile chong_merge
	save `chong_merge'
restore
merge m:1 countrycode using `chong_merge'
drop _merge

///////////////// 11. Combine with data on corruption and transparency

//A. CPI by Transparency International available for download from https://images.transparencycdn.org/images/CPI_FULL_DATA_2021-01-27-162209.zip (last accessed on 05/13/2021)

preserve
	clear
	import excel "${dir}\Data\raw\Transparency International\CPI2020_GlobalTablesTS_210125.xlsx", sheet("CPI Timeseries 2012 - 2020") cellrange(A3:AH183) firstrow
	keep ISO3 CPI*
	rename CPIScore2013 CPIscore2013
	rename CPIScore2012 CPIscore2012
	reshape long CPIscore,i(ISO3) j(year)
	rename ISO3 countrycode
	rename CPI cpi
	collapse cpi,by(countrycode)
	replace countrycode="XKX" if countrycode=="KSV"
	tempfile cpi_merge
	save `cpi_merge'
restore
merge m:1 countrycode using `cpi_merge'
drop _merge 

//B. CCI and VA from WB Worldwide Governance Indicators available for download from https://datacatalog.worldbank.org/dataset/worldwide-governance-indicators (last accessed on 05/14/2021)

preserve
	clear
	import excel "${dir}\Data\raw\WB Worldwide Governance Indicators\WGIEXCEL.xlsx", sheet("Data") firstrow
	keep if IndicatorCode=="CC.EST"
	drop CountryName Indicator*
	rename Country countrycode
	rename E cci1996
	rename F cci1998
	rename G cci2000
	local year 1995	//because loops starts at A and I want H to coincide with 2003 (survey only covers even years initially)
	foreach x in `c(ALPHA)'{
		cap rename `x' cci`year'
		local year=`year'+1
	}
	reshape long cci,i(countrycode) j(year)
	tempfile cci_merge
	save `cci_merge'
	clear
	import excel "${dir}\Data\raw\WB Worldwide Governance Indicators\WGIEXCEL.xlsx", sheet("Data") firstrow
	keep if IndicatorCode=="VA.EST"
	drop CountryName Indicator*
	rename Country countrycode
	rename E va1996
	rename F va1998
	rename G va2000
	local year 1995	//because loops starts at A and I want H to coincide with 2003 (survey only covers even years initially)
	foreach x in `c(ALPHA)'{
		cap rename `x' va`year'
		local year=`year'+1
	}
	reshape long va,i(countrycode) j(year)
	merge 1:1 countrycode year using `cci_merge'
	drop _merge
	tempfile wgi_merge
	save `wgi_merge'
restore

merge 1:1 countrycode year using `wgi_merge'
drop if _merge==2 //Anguilla, Neteherlands Antilles, Cook, French Guiana, Jersey, Martinique, Niue, Reunion
drop _merge 

/////////////////// 12. Combine with data on political institutions from IAEP

*File is available for download from https://havardhegre.net/iaep/ (last accessed on 05/14/2021)

preserve
	use "${dir}\Data\raw\Institutions and Elections Project\IAEPv2_0_2015.dta", clear
	kountry cname, from(other) stuck
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	rename _ISO3C_ countrycode
	replace countrycode="CZE" if country=="Czechoslovakia"
	drop if countrycode==""	//East Germany and couple others, all before 1992
	keep countrycode year leg legcham elecleg elecexec suffreg banned banall stateparty execveto legveto removeleg removeexec execforce exectax execbudget constage court govstruct regstruct
	tempfile iaep_merge
	save `iaep_merge'
restore
merge 1:1 countrycode year using `iaep_merge'
drop if _merge==2 // Taiwan
drop _merge
sort countrycode year

////////////////////// 13. Combine with data on SDDS from the IMF

*The raw data file is a copy-paste of the information available in the DSBB website: https://dsbb.imf.org/sdds/subscription-date (last accessed on 05/17/2021)
*The raw data file for SDDS Plus is a copy-paste of the information available in the DSBB website: https://dsbb.imf.org/sdds-plus/subscription-date (last accessed on 05/17/2021)
*Missing data for SDDS Plus countries imputed by hand based on the reports available in https://dsbb.imf.org/sdds-plus/year/2006/annual-observance-reports-list (last accessed on 05/17/2021)
*I decided to not keep the post variable because that one is hard to identify for the SDDSplus countries

preserve
	use "${dir}\Data\raw\SDDS\sdds_raw.dta",clear
	append using "${dir}\Data\raw\SDDS\sdds_plus_raw.dta"
	gen date_subs=date( dateofsubscription,"DMY")
	format date_subs %td
	gen sdds_subs=yofd(date_subs)
	gen date_met=date(datewhensubscribermetsddsspecifi,"DMY")
	format date_met %td
	gen sdds_met=yofd(date_met)
	gen date_plus=date(dateofadherence,"DMY")
	format date_plus %td
	gen sdds_plus=yofd(date_plus)
	keep country sdds*
	kountry country, from(other) stuck
	rename _ISO3N_ _ISO3N_1
	kountry _ISO3N_1, from(iso3n) to(iso3c)
	rename _ISO3C_ countrycode
	replace countrycode="BLR" if country=="Belarus, Republic of"
	replace countrycode="CHN" if country=="China, People’s Republic"
	replace countrycode="HKG" if country=="China, P.R.: Hong Kong"
	replace countrycode="MKD" if country=="North Macedonia, Republic of"	
	drop _ISO* country
	*Austria https://dsbb.imf.org/content/pdfs/AnnualReports/2006/AUT_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="AUT"
	replace sdds_met=2001 if countrycode=="AUT"
	*Brazil https://dsbb.imf.org/content/pdfs/AnnualReports/2006/BRA_SDDS_AR2006.pdf
	replace sdds_sub=2001 if countrycode=="BRA"
	replace sdds_met=2001 if countrycode=="BRA"
	*Bulgaria https://dsbb.imf.org/content/pdfs/AnnualReports/2006/BGR_SDDS_AR2006.pdf
	replace sdds_sub=2003 if countrycode=="BGR"
	replace sdds_met=2003 if countrycode=="BGR"
	*Canada https://dsbb.imf.org/content/pdfs/AnnualReports/2006/CAN_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="CAN"
	replace sdds_met=1999 if countrycode=="CAN"
	*Czech Republic https://dsbb.imf.org/content/pdfs/AnnualReports/2006/CZE_SDDS_AR2006.pdf
	replace sdds_sub=1998 if countrycode=="CZE"
	replace sdds_met=1999 if countrycode=="CZE"
	*Chile https://dsbb.imf.org/content/pdfs/AnnualReports/2006/CHL_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="CHL"
	replace sdds_met=2000 if countrycode=="CHL"
	*Denmark https://dsbb.imf.org/content/pdfs/AnnualReports/2006/DNK_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="DNK"
	replace sdds_met=2000 if countrycode=="DNK"
	*Finland https://dsbb.imf.org/content/pdfs/AnnualReports/2006/FIN_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="FIN"
	replace sdds_met=2000 if countrycode=="FIN"
	*France https://dsbb.imf.org/content/pdfs/AnnualReports/2006/FRA_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="FRA"
	replace sdds_met=2001 if countrycode=="FRA"
	*Germany https://dsbb.imf.org/content/pdfs/AnnualReports/2006/DEU_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="DEU"
	replace sdds_met=2000 if countrycode=="DEU"
	*Israel https://dsbb.imf.org/content/pdfs/AnnualReports/2006/ISR_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="ISR"
	replace sdds_met=2000 if countrycode=="ISR"
	*Italy https://dsbb.imf.org/content/pdfs/AnnualReports/2006/ITA_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="ITA"
	replace sdds_met=2000 if countrycode=="ITA"
	*Japan https://dsbb.imf.org/content/pdfs/AnnualReports/2006/JPN_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="JPN"
	replace sdds_met=2000 if countrycode=="JPN"
	*Latvia https://dsbb.imf.org/content/pdfs/AnnualReports/2006/LVA_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="LVA"
	replace sdds_met=1999 if countrycode=="LVA"
	*Lithuania https://dsbb.imf.org/content/pdfs/AnnualReports/2006/LTU_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="LTU"
	replace sdds_met=1999 if countrycode=="LTU"
	*Luxembourg https://dsbb.imf.org/content/pdfs/AnnualReports/2006/LUX_SDDS_AR2006.pdf
	replace sdds_sub=2006 if countrycode=="LUX"
	replace sdds_met=2006 if countrycode=="LUX"
	*Netherlands https://dsbb.imf.org/content/pdfs/AnnualReports/2006/NLD_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="NLD"
	replace sdds_met=2000 if countrycode=="NLD"
	*North Macedonia https://dsbb.imf.org/content/pdfs/AnnualReports/2011/MKD_SDDS_AR2011.pdf
	replace sdds_sub=2011 if countrycode=="MKD"
	replace sdds_met=2011 if countrycode=="MKD"
	*Portugal https://dsbb.imf.org/content/pdfs/AnnualReports/2006/PRT_SDDS_AR2006.pdf
	replace sdds_sub=1997 if countrycode=="PRT"
	replace sdds_met=2000 if countrycode=="PRT"
	*Romania https://dsbb.imf.org/content/pdfs/AnnualReports/2006/ROU_SDDS_AR2006.pdf
	replace sdds_sub=2005 if countrycode=="ROU"
	replace sdds_met=2005 if countrycode=="ROU"
	*Slovakia https://dsbb.imf.org/content/pdfs/AnnualReports/2006/SVK_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="SVK"
	replace sdds_met=1999 if countrycode=="SVK"
	*Slovenia https://dsbb.imf.org/content/pdfs/AnnualReports/2006/SVN_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="SVN"
	replace sdds_met=2000 if countrycode=="SVN"
	*Spain https://dsbb.imf.org/content/pdfs/AnnualReports/2006/ESP_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="ESP"
	replace sdds_met=2000 if countrycode=="ESP"
	*Sweden https://dsbb.imf.org/content/pdfs/AnnualReports/2006/SWE_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="SWE"
	replace sdds_met=2000 if countrycode=="SWE"
	*Switzerland https://dsbb.imf.org/content/pdfs/AnnualReports/2006/CHE_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="CHE"
	replace sdds_met=2001 if countrycode=="CHE"
	*United States https://dsbb.imf.org/content/pdfs/AnnualReports/2006/USA_SDDS_AR2006.pdf
	replace sdds_sub=1996 if countrycode=="USA"
	replace sdds_met=1999 if countrycode=="USA"
	tempfile sdds_merge
	save `sdds_merge'
restore
merge m:1 countrycode using `sdds_merge'
drop _merge

foreach x of varlist sdds_subs sdds_plus sdds_met{
	rename `x' temp
	gen `x'=(year>=temp)
	drop temp
}

/////////////////// 14. Combine with data on IDA cut-offs from Galiani et al

*Replication files for Galiani et al. (Journal of Economic Growth 2017) are available for download from http://econweb.umd.edu/~galiani/data.html (last accessed on 05/18/2021)
*This dataset only goes until 2010, so I fill in by hand for 2011-2013 using various sources
*Footnote 2 here shows that the IDA operational threshold for FY XX is based on GNI per capita two years before: http://documents1.worldbank.org/curated/en/287661468782159368/pdf/264980IDA0eligibility035.pdf
*This IMF document states a cutoff of 1135 for FY 2010 in line with Galiani's value for 2008: https://www.imf.org/external/np/pp/eng/2010/011110.pdf 
*This IMF document states a cutoff of 1195 for FY 2013, which I use for 2011: https://www.imf.org/external/np/pp/eng/2013/031813a.pdf
*This document states a cutoff of 1205 for FY 2014 based on 2012 GNI: https://www.climateinvestmentfunds.org/sites/cif_enc/files/Criteria%20for%20new%20SREP%20pilot%20countries_final_2-26-2014.pdf
*This IMF document states a cutoff of 1215 for FY 2015, which I use for 2013: https://www.imf.org/external/np/pp/eng/2015/062415.pdf

merge m:1 year using "${dir}\Data\raw\Galiani et al JEG 2017\IDA_cutoffs.dta"
drop _merge
replace ida_cutoff=1195 if year==2011
replace ida_cutoff=1205 if year==2012
replace ida_cutoff=1215 if year==2013

/////////////////// 15. Construct IDA crossings

*IDA countries
*hand-coded based on information in https://ida.worldbank.org/about/borrowing-countries and https://ida.worldbank.org/about/ida-graduates (last accessed 05/18/2021)

gen ida_ctry=0
replace ida_ctry =1 if isonv10==204 //Benin
replace ida_ctry =1 if isonv10==854 //Burkina Faso
replace ida_ctry =1 if isonv10==108 //Burundi
replace ida_ctry =1 if isonv10==120 //Cameroon
replace ida_ctry =1 if isonv10==132 //Cape Verde
replace ida_ctry =1 if isonv10==140 //Central African Republic
replace ida_ctry =1 if isonv10==148 //Chad
replace ida_ctry =1 if isonv10==174 //Comoros
replace ida_ctry =1 if isonv10==180 //DRC
replace ida_ctry =1 if isonv10==178 //Congo
replace ida_ctry =1 if isonv10==384 //Ivory Coast
replace ida_ctry =1 if isonv10==232 //Eritrea
replace ida_ctry =1 if isonv10==231 //Ethiopia
replace ida_ctry =1 if isonv10==270 //Gambia
replace ida_ctry =1 if isonv10==288 //Ghana
replace ida_ctry =1 if isonv10==324 //Guinea
replace ida_ctry =1 if isonv10==624 //Guinea Bissau
replace ida_ctry =1 if isonv10==404 //Kenya
replace ida_ctry =1 if isonv10==426 //Lesotho
replace ida_ctry =1 if isonv10==430 //Liberia
replace ida_ctry =1 if isonv10==450 //Madagascar
replace ida_ctry =1 if isonv10==454 //Malawi
replace ida_ctry =1 if isonv10==466 //Mali
replace ida_ctry =1 if isonv10==478 //Mauritania
replace ida_ctry =1 if isonv10==508 //Mozambique
replace ida_ctry =1 if isonv10==562 //Niger
replace ida_ctry =1 if isonv10==566 //Nigeria
replace ida_ctry =1 if isonv10==646 //Rwanda
replace ida_ctry =1 if isonv10==678 //Sao Tome
replace ida_ctry =1 if isonv10==686 //Senegal
replace ida_ctry =1 if isonv10==694 //Sierra Leone
replace ida_ctry =1 if isonv10==706 //Somalia
replace ida_ctry =1 if isonv10==736 //Sudan	//OJO: South Sudan too
replace ida_ctry =1 if isonv10==834 //Tanzania
replace ida_ctry =1 if isonv10==768 //Togo
replace ida_ctry =1 if isonv10==800 //Uganda
replace ida_ctry =1 if isonv10==894 //Zambia
replace ida_ctry =1 if isonv10==716 //Zimbabwe
replace ida_ctry =1 if isonv10==116 //Cambodia
replace ida_ctry =1 if isonv10==242 //Fiji
replace ida_ctry =1 if isonv10==296 //Kiribati
replace ida_ctry =1 if isonv10==418 //Lao
replace ida_ctry =1 if isonv10==584 //Marshall
replace ida_ctry =1 if isonv10==583 //Micronesia
replace ida_ctry =1 if isonv10==104 //Myanmar
replace ida_ctry =1 if isonv10==598 //Papua
replace ida_ctry =1 if isonv10==882 //Samoa
replace ida_ctry =1 if isonv10==90 //Solomon islands
replace ida_ctry =1 if isonv10==626 //Timor Leste
replace ida_ctry =1 if isonv10==776 //Tonga
replace ida_ctry =1 if isonv10==798 //Tuvalu
replace ida_ctry =1 if isonv10==548 //Vanuatu
replace ida_ctry =1 if isonv10==4 //Afghanistan
replace ida_ctry =1 if isonv10==50 //Bangladesh
replace ida_ctry =1 if isonv10==64 //Bhutan
replace ida_ctry =1 if isonv10==462 //Maldives
replace ida_ctry =1 if isonv10==524 //Nepal
replace ida_ctry =1 if isonv10==586 //Pakistan
replace ida_ctry =1 if countrycode=="XKX" //Kosovo
replace ida_ctry =1 if isonv10==417 //Kyrgystan
replace ida_ctry =1 if isonv10==762 //Tajikistan
replace ida_ctry =1 if isonv10==860 //Uzbekistan
replace ida_ctry =1 if isonv10==212 //Dominica
replace ida_ctry =1 if isonv10==670 //St Vincent
replace ida_ctry =1 if isonv10==308 //Grenada
replace ida_ctry =1 if isonv10==328 //Guyana
replace ida_ctry =1 if isonv10==332 //Haiti
replace ida_ctry =1 if isonv10==340 //Honduras
replace ida_ctry =1 if isonv10==558 //Nicaragua
replace ida_ctry =1 if isonv10==662 //St Lucia
replace ida_ctry =1 if isonv10==262 //Djibouti
*replace ida_ctry =1 if isonv10==760 //Syria only returned in 2017 and has been always frozen
replace ida_ctry =1 if isonv10==887 //Yemen
replace ida_ctry =1 if isonv10==8 //Albania
replace ida_ctry =1 if isonv10==24 //Angola
replace ida_ctry =1 if isonv10==51 //Armenia
replace ida_ctry =1 if isonv10==31 //Azerbaijan
replace ida_ctry =1 if isonv10==68 //Bolivia
replace ida_ctry =1 if isonv10==70 //Bosnia
replace ida_ctry =1 if isonv10==156 //China
replace ida_ctry =1 if isonv10==818 //Egypt	
replace ida_ctry =1 if isonv10==226 //Equatorial Guinea	
replace ida_ctry =1 if isonv10==268 //Georgia
replace ida_ctry =1 if isonv10==356 //India
replace ida_ctry =1 if isonv10==360 //Indonesia
replace ida_ctry =1 if isonv10==498 //Moldova
replace ida_ctry =1 if isonv10==496 //Mongolia	
replace ida_ctry =1 if countrycode=="MNE" //Montenegro
replace ida_ctry =1 if isonv10==807 //North Macedonia	
replace ida_ctry =1 if isonv10==608 //Philippines
replace ida_ctry =1 if countrycode=="SRB" //Serbia
replace ida_ctry =1 if isonv10==144 //Sri Lanka
replace ida_ctry =1 if isonv10==659 //St Kitts
replace ida_ctry =1 if isonv10==704 //Vietnam

*IDA crossing: To count as crossing, (i) I must observe at least three years before since 1992, (ii) at least 75% and at least 5 WDI vintages must agree 

*Temporary variables used to establish crossings
foreach x of numlist 1989 1990 1991 1992 1998 2000 2002 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020{
	gen cross_`x'=ny_gnp_pcap_cd_v`x'>ida_cutoff if ny_gnp_pcap_cd_v`x'!=.&ida_ctry==1
}
egen crossings=rowtotal(cross_*),miss
egen obs=rownonmiss(cross_*)
gen sh_cross=crossings/obs

gen ida_cross=0
gen ida_cross_g=0

*Albania
replace ida_cross=1 if isonv10==8&year==1999
replace ida_cross_g=1 if isonv10==8&year==1999
*Angola
replace ida_cross=1 if isonv10==24&year==2005
replace ida_cross_g=1 if isonv10==24&year==2005
*Armenia
replace ida_cross=1 if isonv10==51&year==2003
replace ida_cross_g=1 if isonv10==51&year==2003
*Azerbaijan
replace ida_cross=1 if isonv10==31&year==2005
replace ida_cross_g=1 if isonv10==31&year==2005	
*Bosnia: only one obs pre-cross
replace ida_cross_g=1 if isonv10==70&year==1997
*Bhutan
replace ida_cross=1 if isonv10==64&year==2003
replace ida_cross_g=1 if isonv10==64&year==2004
*Bolivia
replace ida_cross=1 if isonv10==68&year==1997
replace ida_cross_g=1 if isonv10==68&year==1997
*Cameroon
replace ida_cross=1 if isonv10==120&year==2011
replace ida_cross_g=1 if isonv10==120&year==2008
*China
replace ida_cross=1 if isonv10==156&year==2000
replace ida_cross_g=1 if isonv10==156&year==2000
*Ivory Coast
replace ida_cross=1 if isonv10==384&year==2010
*Congo
replace ida_cross=1 if isonv10==178&year==2006
replace ida_cross_g=1 if isonv10==178&year==2006
*Djibouti
replace ida_cross_g=1 if isonv10==262&year==2007
*Egypt // only two years below
replace ida_cross_g=1 if isonv10==818&year==1995
*Equatorial Guinea
replace ida_cross=1 if isonv10==226&year==2001
replace ida_cross_g=1 if isonv10==226&year==1998
*Georgia
replace ida_cross=1 if isonv10==268&year==2004
replace ida_cross_g=1 if isonv10==268&year==2003
*Ghana
replace ida_cross=1 if isonv10==288&year==2008
replace ida_cross_g=1 if isonv10==288&year==2009
*Guyana 
replace ida_cross=1 if isonv10==328&year==2002
replace ida_cross_g=1 if isonv10==328&year==1999
*Honduras
replace ida_cross=1 if isonv10==340&year==2001
replace ida_cross_g=1 if isonv10==340&year==2000
*India
replace ida_cross=1 if isonv10==356&year==2010
replace ida_cross_g=1 if isonv10==356&year==2010
*Indonesia
replace ida_cross=1 if isonv10==360&year==1994
replace ida_cross_g=1 if isonv10==360&year==1994
*Laos // not in Galiani
replace ida_cross=1 if isonv10==418&year==2012
*Lesotho // not in Galiani
replace ida_cross=1 if isonv10==426&year==2011
*Moldova
replace ida_cross=1 if isonv10==498&year==2007
replace ida_cross_g=1 if isonv10==498&year==2007
*Maldives
replace ida_cross=1 if isonv10==462&year==1997
*Mongolia
replace ida_cross=1 if isonv10==496&year==2007
replace ida_cross_g=1 if isonv10==496&year==2006
*Mauritania
replace ida_cross=1 if isonv10==478&year==2013
*Nicaragua // not in Galiani
replace ida_cross=1 if isonv10==558&year==2009
*Nigeria
replace ida_cross=1 if isonv10==566&year==2008
replace ida_cross_g=1 if isonv10==566&year==2008
*Pakistan // not in Galiani
replace ida_cross=1 if isonv10==586&year==2013
*Papua New Guinea
replace ida_cross=1 if isonv10==598&year==2009
replace ida_cross_g=1 if isonv10==598&year==2009
*Samoa
replace ida_cross_g=1 if isonv10==882&year==1995
*Solomon Islands
replace ida_cross=1 if isonv10==90&year==2012
replace ida_cross_g=1 if isonv10==90&year==1997
*Sri Lanka
replace ida_cross=1 if isonv10==144&year==2003
replace ida_cross_g=1 if isonv10==144&year==2003
*Sudan
replace ida_cross=1 if isonv10==736&year==2010
replace ida_cross_g=1 if isonv10==736&year==2008
*Sao Tome
replace ida_cross=1 if isonv10==678&year==2011
*Timor-Leste
replace ida_cross=1 if isonv10==626&year==2007
replace ida_cross_g=1 if isonv10==626&year==2006
*Uzbekistan: 
replace ida_cross=1 if isonv10==860&year==2010
replace ida_cross_g=1 if isonv10==860&year==2010
*Vietnam // not in Galiani
replace ida_cross=1 if isonv10==704&year==2011
*Yemen // not in Galiani
replace ida_cross=1 if isonv10==887&year==2010
*Zambia // not in Galiani
replace ida_cross=1 if isonv10==894&year==2012

//dummy for post-cross years
bysort countrycode (year): gen ida_cross_post=sum(ida_cross)

drop cross_* crossings obs sh_cross ny_gnp_pcap_cd_v*

/////////////// 16. create additional variables for analysis

*numeric country code
egen countrycode_num=group(countrycode)

*log GDP
gen lngdp14=ln(ny_gdp_mktp_kn_2014_11)
forvalues x=2014/2021{
	foreach y in 01 02 03 04 05 06 07 08 09 10 11 12{
		cap gen lngdp_v`x'_`y'=ln(ny_gdp_mktp_kn_v`x'_`y')
		cap label var lngdp_v`x'_`y' "log GDP (LCU), WDI `y' `x' vintage"
	}
}
drop ny_gdp_mktp_kn_v*

*log lights
gen lndn18=ln(avdn18)
gen lndnHR=ln(avdnHR)
gen lndn13=ln(avdn13)
gen avdnPS=exp(lrgdpchlight)*sp_pop_totl_2014_11
gen lndnPS=ln(avdnPS)
drop avdn18 avdnHR lrgdpchlight avdnPS

*adjusting lndn18

//keep sample consistent with lndn13
egen temp=mean(lndn13),by(countrycode)
gen in13=(temp<.)
replace in13=. if lndn13==.&year<=2013
drop temp
//yearly change in lights
sort countrycode year
gen d_lndn18=lndn18-lndn18[_n-1] if countrycode==countrycode[_n-1]
//adjusting 2014
egen temp=mean(d_lndn18) if year>=2012&year<=2016&year!=2014,by(countrycode)
egen temp1=mean(temp),by(countrycode)
gen d_lndn18_adj=d_lndn18 if year!=2014
replace d_lndn18_adj=temp1 if year==2014
drop temp*
gen lndn18_adj14=lndn18 if year<=2013&in13==1
replace lndn18_adj14=lndn18_adj14[_n-1]*(1+d_lndn18_adj) if year>=2014
//adjusting 2017 also
egen temp=mean(d_lndn18) if year==2016|year==2018,by(countrycode)
egen temp1 =mean(temp),by(countrycode)
replace d_lndn18_adj=temp1 if year==2017
drop temp*
gen lndn18_adj1417=lndn18 if year<=2013&in13==1
replace lndn18_adj1417=lndn18_adj1417[_n-1]*(1+d_lndn18_adj) if year>=2014
replace lndn18=. if year<=2013&in13!=1
replace lndn18=. if lndn18_adj1417==.
drop in13 d_lndn18*
//adjusting lndn13 in 2010
sort countrycode year
gen d_lndn13=lndn13-lndn13[_n-1] if countrycode==countrycode[_n-1]
egen temp=mean(d_lndn13) if year>=2008&year<=2012&year!=2010,by(countrycode)
egen temp1=mean(temp),by(countrycode)
gen d_lndn13_adj=d_lndn13 if year!=2010
replace d_lndn13_adj=temp1 if year==2010
gen lndn13_adj10=lndn13 if year!=2010
replace lndn13_adj10=lndn13_adj10[_n-1]*(1+d_lndn13_adj) if year==2010
drop temp* d_lndn13*

*Growth rates, lags and first difference of GDP and lights
sort countrycode year
gen dn13_growth=(avdn13-avdn13[_n-1])/avdn13[_n-1] if countrycode==countrycode[_n-1]
gen gdp14_growth=ny_gdp_mktp_kd_zg_2014_11/100
winsor2 gdp14_growth dn13_growth if year>=1992&year<=2013, replace cuts(1 99)

*GNI per capita
gen gni_pc=ny_gnp_pcap_cd_2014_11/1000

*FIW index, discrete measures, fixed measures
gen fiw_unadj= 0.5*(fiw_pr + fiw_cl)
gen fiw=(fiw_unadj-1)
gen fiw2=fiw^2

gen Dfree=(fiw<2) if fiw!=.
gen Dpfree=(fiw>=2&fiw<=4) if fiw!=.
gen Dnfree=(fiw>4) if fiw!=.

egen fiw_avg9213=mean(fiw) if year>=1992&year<=2013,by(countrycode)
egen temp=mean(fiw) if year>=1982&year<=1991,by(countrycode)
egen fiw_avg8291=mean(temp),by(countrycode)
drop temp

gen Dfree_avg9213=(fiw_avg9213<2) if fiw_avg9213!=.
gen Dpfree_avg9213=(fiw_avg9213>=2&fiw_avg9213<=4) if fiw_avg9213!=.
gen Dnfree_avg9213=(fiw_avg9213>4) if fiw_avg9213!=.

*interactions of lights and democracy
gen lndn13_fiw=lndn13*fiw
gen lndn13_Dfree=lndn13*Dfree
gen lndn13_Dpfree=lndn13*Dpfree
gen lndn13_Dnfree=lndn13*Dnfree

*Adjusted regime type: I follow Cheibub et al. but adjust to match Freedom House in case of discrepancy
gen regime_adj=regimeDD
replace regime_adj=0 if countrycode=="BWA"&autocracyFH==0&regimeDD!=.
replace regime_adj=2 if countrycode=="GUY"&autocracyFH==0&regimeDD!=.
replace regime_adj=1 if countrycode=="NAM"&autocracyFH==0&regimeDD!=.
replace regime_adj=0 if countrycode=="WSM"&autocracyFH==0&regimeDD!=.
replace regime_adj=2 if countrycode=="SYC"&autocracyFH==0&regimeDD!=.
replace regime_adj=1 if countrycode=="MOZ"&autocracyFH==0&regimeDD!=.
replace regime_adj=0 if countrycode=="ZAF"&autocracyFH==0&regimeDD!=.
replace regime_adj=3 if countrycode=="ARM"&autocracyFH==1&regimeDD!=.
replace regime_adj=3 if countrycode=="ATG"&autocracyFH==1&regimeDD!=.
replace regime_adj=1 if countrycode=="GEO"&autocracyFH==0&autocracyDD==1
replace regime_adj=3 if countrycode=="GEO"&autocracyFH==1&autocracyDD==0
replace regime_adj=0 if countrycode=="LSO"&autocracyFH==0&regimeDD!=.
replace regime_adj=1 if countrycode=="RUS"&autocracyFH==0&regimeDD!=.
replace regime_adj=1 if countrycode=="HTI"&autocracyFH==0&regimeDD!=.
replace regime_adj=1 if countrycode=="KGZ"&autocracyFH==0&autocracyDD==1
replace regime_adj=3 if countrycode=="KGZ"&autocracyFH==1&autocracyDD==0
replace regime_adj=1 if countrycode=="GNB"&autocracyFH==0&autocracyDD==1
replace regime_adj=3 if countrycode=="GNB"&autocracyFH==1&autocracyDD==0
replace regime_adj=2 if countrycode=="ZMB"&autocracyFH==0&regimeDD!=.
replace regime_adj=2 if countrycode=="KEN"&autocracyFH==0&regimeDD!=.
replace regime_adj=3 if countrycode=="KEN"&autocracyFH==1&regimeDD!=.
replace regime_adj=0 if countrycode=="BIH"&autocracyFH==0&regimeDD!=.
replace regime_adj=1 if countrycode=="CAF"&autocracyFH==0&regimeDD!=.
replace regime_adj=4 if countrycode=="CAF"&autocracyFH==1&regimeDD!=.
replace regime_adj=2 if countrycode=="LBR"&autocracyFH==0&regimeDD!=.
replace regime_adj=3 if countrycode=="LBR"&autocracyFH==1&regimeDD!=.
replace regime_adj=4 if countrycode=="BDI"&autocracyFH==1&autocracyDD==0
replace regime_adj=3 if countrycode=="COM"&autocracyFH==1&autocracyDD==0
replace regime_adj=2 if countrycode=="DJI"&autocracyFH==0&autocracyDD==1
replace regime_adj=2 if countrycode=="GMB"&autocracyFH==0&autocracyDD==1
replace regime_adj=3 if countrycode=="GHA"&autocracyFH==1&autocracyDD==0
replace regime_adj=3 if countrycode=="MDA"&autocracyFH==1&autocracyDD==0
replace regime_adj=0 if countrycode=="MNE"&autocracyFH==0&autocracyDD==1
replace regime_adj=3 if countrycode=="NGA"&autocracyFH==1&autocracyDD==0
replace regime_adj=3 if countrycode=="SLB"&autocracyFH==1&autocracyDD==0
replace regime_adj=5 if countrycode=="BTN"&autocracyFH==1&autocracyDD==0
replace regime_adj=3 if countrycode=="DOM"&autocracyFH==1&autocracyDD==0
replace regime_adj=2 if countrycode=="ECU"&autocracyFH==0&autocracyDD==1
replace regime_adj=0 if countrycode=="FJI"&autocracyFH==0&autocracyDD==1
replace regime_adj=3 if countrycode=="PHL"&autocracyFH==1&autocracyDD==0
replace regime_adj=3 if countrycode=="UKR"&autocracyFH==1&autocracyDD==0
replace regime_adj=0 if countrycode=="BGD"&autocracyFH==0&autocracyDD==1
replace regime_adj=3 if countrycode=="MDV"&autocracyFH==1&autocracyDD==0
replace regime_adj=5 if countrycode=="NPL"&autocracyFH==1&autocracyDD==0
replace regime_adj=1 if countrycode=="NER"&autocracyFH==0&autocracyDD==1
replace regime_adj=4 if countrycode=="PAK"&autocracyFH==1&autocracyDD==0
replace regime_adj=4 if countrycode=="PRY"&autocracyFH==1&autocracyDD==0	
replace regime_adj=4 if countrycode=="THA"&autocracyFH==1&autocracyDD==0
replace regime_adj=3 if countrycode=="VEN"&autocracyFH==1&autocracyDD==0
replace regime_adj=1 if regime_adj==2
replace regime_adj=2 if regime_adj==3
replace regime_adj=3 if regime_adj==4|regime_adj==5

*adjusting GDP components
gen cons_gdp=ne_con_petc_zs_2014_11/100 
gen inv_gdp=ne_gdi_totl_zs_2014_11/100
gen gov_gdp=ne_con_govt_zs_2014_11/100
gen exp_gdp=ne_exp_gnfs_zs_2014_11/100
gen imp_gdp=ne_imp_gnfs_zs_2014_11/100

foreach x in cons inv gov exp imp{
	gen ln`x'=ln(1+(`x'_gdp*ny_gdp_mktp_kn_2014_11))
}

//long-run measures: DMSP13

*GDP
egen temp=mean(ny_gdp_mktp_kn_2014_11) if year==1992|year==1993,by(countrycode)
egen temp1=mean(ny_gdp_mktp_kn_2014_11) if year==2012|year==2013,by(countrycode)
egen temp2=rowtotal(temp temp1),missing
gen lngdp14_lr=ln(temp2)
drop temp*
*FIW
egen temp=mean(fiw) if year==1992|year==1993,by(countrycode)
egen temp1=mean(fiw) if year==2012|year==2013,by(countrycode)
egen fiw_lr=rowtotal(temp temp1),missing
egen temp2=mean(temp),by(countrycode)
egen temp3=mean(temp1),by(countrycode)
gen d_fiw_lr9213=temp3-temp2
drop temp*
*DN
egen temp=mean(avdn13) if year==1992|year==1993,by(countrycode)
egen temp1=mean(avdn13) if year==2012|year==2013,by(countrycode)
egen temp2=rowtotal(temp temp1),missing
gen lndn13_lr=ln(temp2)
drop temp*
*Consumption
gen cons=cons_gdp*ny_gdp_mktp_kn_2014_11
egen temp=mean(cons) if year==1992|year==1993,by(countrycode)
egen temp1=mean(cons) if year==2012|year==2013,by(countrycode)
egen temp2=rowtotal(temp temp1),missing
gen lncons13_lr=ln(temp2)
drop temp* cons

*Finding low growth years
xtset countrycode_num year
xtreg lndn13 i.year,fe cluster(countrycode)	//regression of log lights on country and year fixed effects
predict lndn13_hat, xbu	//predicted lights by country and year
gen low=(lndn13<lndn13_hat) if lndn13!=.&lndn13_hat!=.	//we define as low growth if the actual value is below the prediction based on year average
drop lndn13_hat
xtset,clear

/////////////// 17. add variable labels

label var sci_agcen "Agriculture census conducted within last 10 years"
label var sci_excncpt "Balance of payments manual in use, fifth or sixth edition"
label var sci_exdebt "External debt reporting status, actual or preliminary"
label var sci_fscov "Consolidated central government accounts"
label var sci_gdp "Periodicity of GDP growth indicator"
label var sci_gender "Periodicity of gender equality in education indicator" 
label var sci_hiv "Periodicity of HIV/AIDS indicator"
label var sci_hlthsurv "3+ health surveys conducted within last 10 years (DHS, etc)"
label var sci_immun "Periodicity of Immunization indicator"
label var sci_impexp "Import/export prices, monthly or quarterly availability"
label var sci_indust "Industrial production index, monthly or quarterly availability" 
label var sci_iq_sci_mthd "Statistical methodology score"
label var sci_iq_sci_ovrl "Overall statistical capacity score"
label var sci_iq_sci_prdc "Periodicity and Timeliness score"
label var sci_iq_sci_srce "Source data score"
label var sci_malnut "Periodicity of child malnutrition indicator" 
label var sci_matern "Periodicity of maternal health indicator"
label var sci_mortal "Periodicity of child mortality indicator"
label var sci_nabase "National accounts base year within last 10 years or annual chain linking"
label var sci_popcen "Population census conducted within last 10 years" 
label var sci_popreg "Complete vital registration system"
label var sci_poverty "Periodicity of income poverty indicator"
label var sci_povsurv "3+ poverty surveys conducted within last 10 years (IES, LSMS, etc.)"
label var sci_prcpbase "CPI base year within last 10 years or annual chain linking"
label var sci_primcomp "Periodicity of primary completion indicator"
label var sci_sdds "Subscribed to SDDS"
label var sci_unesco "Annual or missed reporting to UNESCO only once in the last 4 years"
label var sci_water "Periodicity of access to water indicator"
label var sci_who "Reported data on measles vaccine coverage consistent w/ WHO for 4 years"
label var ag_lnd_agri_zs_2014_11 "Agricultural land (% of land area)"
label var ne_con_govt_zs_2014_11 "General government final consumption expenditure (% of GDP)" 
label var ne_con_petc_zs_2014_11 "Household final consumption expenditure, etc. (% of GDP)"
label var ne_exp_gnfs_zs_2014_11 "Exports of goods and services (% of GDP)"
label var ne_gdi_totl_zs_2014_11 "Gross capital formation (% of GDP)"
label var ne_imp_gnfs_zs_2014_11 "Imports of goods and services (% of GDP)" 
label var nv_agr_totl_zs_2014_11 "Agriculture, value added (% of GDP)" 
label var nv_ind_manf_zs_2014_11 "Manufacturing, value added (% of GDP)"
label var nv_ind_totl_zs_2014_11 "Industry, value added (% of GDP)"
label var nv_srv_tetc_zs_2014_11 "Services, etc., value added (% of GDP)"
label var ny_gdp_pcap_kd_2014_11 "GDP per capita (constant 2005 US$)"
label var ny_gdp_petr_rt_zs_2014_11 "Oil rents (% of GDP)"
label var ny_gdp_totl_rt_zs_2014_11 "Total natural resources rents (% of GDP)"
label var ny_gnp_pcap_cd_2014_11 "GNI per capita, Atlas method (current US$)" 
label var se_prm_nenr_2014_11 "School enrollment, primary (% net)" 
label var sp_dyn_imrt_in_2014_11 "Mortality rate, infant (per 1,000 live births)"
label var sp_dyn_le00_in_2014_11 "Life expectancy at birth, total (years)"
label var sp_urb_totl_in_zs_2014_11 "Urban population (% of total)"
label var eg_elc_accs_zs			"Access to electricity (% of population)"
label var eg_elc_accs_ur_zs			"Access to electricity (% of urban population)"
label var eg_elc_accs_ru_zs			"Access to electricity (% of rural population)"
label var ny_gdp_mktp_kn_2014_11 	"GDP (constant local currency)" 
label var ny_gdp_pcap_kn_2014_11 	"GDP per capita (constant local currency)"
label var ny_gdp_mktp_kd_zg_2014_11	"GDP growth in constant LCU"
label var gdp14_growth				"GDP growth in constant LCU/100, winsorized 1-99"
label var gni_pc					"GNI per capita (1000s of current USD, Atlas method)"
label var dn13_growth				"DN13 growth, winsorized 1-99"
label var sp_pop_totl_2014_11 		"Population"
label var ag_lnd_totl_k2_2014_11 	"Land area in sq. km"
label var eg_use_elec_kh_2014_11 "Electricity consumption (KwH)"
label var fiw_pr 			"Freedom House - Political Rights"
label var fiw_cl 			"Freedom House - Civil Liberties"
label var lndnHSW			"log DN from Henderson, Storeygard and Weil"
label var lngdpHSW			"log GDP (constant LCU) from Henderson, Storeygard and Weil"
label var lngdp14			"log GDP (constant LCU) using ny_gdp_mktp_kn_2014_11"
label var lndn18			"log DN using DN from Li et al - 92-13: DMSP, 14-18: VIIRS"
label var lndn18_adj14		"lndn18 with 2014 imputed"
label var lndn18_adj1417	"lndn18 with 2014 and 2017 imputed"
label var lndn13_adj10		"lndn13 with 2010 imputed"
label var lndnHR 			"log DN from Hodler and Raschky"
label var lndn13			"log lights from avdn13" 
label var lndnPS			"log lights from avdnPS"
label var count_lights 		"Total number of pixels - south of the Arctic Circle"
label var fiw_unadj			"Original FH FIW score (1-7)"
label var fiw				"Adjusted FH FIW score (=fh_score-1)"
label var fiw2				"Adjusted FH FIW score squared"
label var Dfree				"Free country (FIW<2)"
label var Dpfree			"Partially Free country (2<=FIW<=4)"
label var Dnfree			"Not Free country (FIW>4)"
label var fiw_avg9213 		"Average adjusted FIW score (1992-2013)"
label var fiw_avg8291 		"Average adjusted FIW score (1982-1991)"
label var Dfree_avg9213 	"Free country (fiw_avg9213<2)"
label var Dpfree_avg9213 	"Partially Free country (2<=fiw_avg9213<=4)"
label var Dnfree_avg9213	"Not Free country (fiw_avg9213>4)"
label var subsubregcode_un	"Subregion code (UN classification)"
label var subsubregname_un	"Subregion name (UN classification)"
label var countrycode_num	"Numerical grouping based on countrycode"
label var cons_gdp			"Final private consumption (% of GDP)" 
label var exp_gdp 			"Total exports (% of GDP)"
label var imp_gdp 			"Total Imports (% of GDP)"
label var gov_gdp 			"Final government spending (% of GDP)"
label var inv_gdp 			"Gross capital formation (% of GDP)"
label var lncons			"Log final private consumption (constant LCU)" 
label var lninv 			"Log Gross capital formation (constant LCU)"
label var lngov				"Log government spending (constant LCU)" 
label var lnexp 			"Log Total exports (constant LCU)"
label var lnimp				"Log Total imports (constant LCU)"
label var polity2			"Polity2 score from Polity V project"
label var democracyP5		"Democracy score from Polity V project"
label var autocracyP5		"Autocracy score from Polity V project"
label var autocracyDD 		"=1-Democracy dummy from Cheibub et al."
label var regimeDD			"Regime classification from Cheibub et al."
label var autocracyFH		"=1-Electoral Democracy dummy from Freedom House"
label var autocracyPS		"Autocracy measure based on Papaioannou and Siourounis"
label var returned_days		"Avg. days to get letter back from Chong et al" 
label var Dreturned_any 	"Share of letters returned from Chong et al"
label var Dreturned_90		"Share of letters returned in <90 days from Chong et al"
label var autocracyANRR		"Autocracy dummy from Acemoglu et al."
label var latitude			"Latitude of capital city"
label var longitude			"Longitude of capital city"
label var mean_yrs_educ		"Average year of schooling from UNDP HDI"
label var cpi				"Corruption Perceptions Index from Transp International"
label var va				"Voice and Accountability index from WGI"
label var cci				"Corruption Control index from WGI"
label var sdds_subs			"Country subscribed to SDDS" 
label var sdds_plus 		"Country subscribed to SDDS Plus"
label var sdds_met			"Country met SDDS specifications"
label var ida_cutoff		"Threshold value of GNI pc (current USD) for IDA elegibility"
label var ida_ctry			"Country was a beneficiary of the IDA between 1992 and 2013"
label var ida_cross 		"Year in which country crosses IDA GNIpc threshold"
label var ida_cross_g 		"Year in which country crosses IDA GNIpc threshold from Galiani et al"
label var ida_cross_post	"Dummy for years after crossing IDA threshold"
label var lndn13_fiw		"Interaction of lndn13 w/ adjusted FIW score" 
label var lndn13_Dfree 		"Interaction of lndn13 w/ Dfree dummy"
label var lndn13_Dpfree 	"Interaction of lndn13 w/ Dpfree dummy"
label var lndn13_Dnfree		"Interaction of lndn13 w/ Dnfree dummy"
label var regime_adj 		"Adjusted regime classification (FH and DD)"
label var lngdp14_lr		"Log GDP (constant LCU), Avg 1992-93 and 2012-13" 
label var fiw_lr 			"FIW score, Avg 1992-93 and 2012-13"
label var d_fiw_lr9213 		"Change in FIW score between 1992 and 2013"
label var lndn13_lr 		"Log DN, Avg 1992-93 and 2012-13"
label var lncons13_lr		"Log private consumption, Avg 1992-93 and 2012-13"
label var low				"=1 if Demeaned Log DN (country and year) < 0"

/////////////// 18. adjust sample composition

//following Henderson et al, I exclude Singapore, Equatorial Guinea and Bahrain from the analysis. I also drop Taiwan, as it has no World Bank data. 
drop if countrycode=="SGP"|countrycode=="GNQ"|countrycode=="BHR"|countrycode=="TWN"

/////////////// 19. save dataset for analysis

compress
save "${dir}\Data\master\Estimations", replace 

/////////////// 20. Minimal version of the estimating dataset to import to R for ZAM-influence:

reghdfe lngdp14 lndn13 fiw fiw2 lndn13_fiw, absorb(countrycode year) cluster(countrycode)
keep if e(sample)
keep lngdp14 lndn13 fiw fiw2 lndn13_fiw countrycode countryname year Dfree_avg9213 Dpfree_avg9213 Dnfree_avg9213
save "${dir}\Data\temp\ZAM_sample_in.dta",replace

////////////// 21. set up files for maps

*The shapefile used for the maps is available for download from https://hub.arcgis.com/datasets/UIA::uia-world-countries-boundaries/about (last accessed 06/27/2021)

clear
cd "${dir}\Data\master"
spshape2dta "${dir}\Data\raw\UIA World Countries Boundaries\World_Countries__Generalized_.shp", replace saving(world_map)
use world_map_shp, clear
//change shapefile to Mercator projection
geo2xy _Y _X, proj (picard) replace
//Add country names and drop Antartica
merge m:1 _ID using world_map 
drop if COUNTRY=="Antarctica" // drop the polar regions
drop _merge
sort _ID
save world_map_shp, replace
//fix countrycodes in world_map file
use world_map, clear
drop if COUNTRY=="Antarctica" // drop the polar regions
//generate countrycodes for merge
rename COUNTRY country
gen country1=country
replace country1=COUNTRYAFF if country1!=COUNTRYAFF
kountry country1, from(other) stuck
rename _ISO3N_ _ISO3N_1
kountry _ISO3N_1, from(iso3n) to(iso3c)
drop _ISO3N_1
rename _ISO3C_ countrycode
*Manually input empty missing codes //NO Macau or Hong Kong or Taiwan
replace countrycode="CIV" if country=="Côte d'Ivoire"
replace countrycode="COD" if country=="Congo DRC"
replace countrycode="CPV" if country=="Cabo Verde"
replace countrycode="MKD" if country=="North Macedonia"
replace countrycode="SWZ" if country=="Eswatini"
replace countrycode="XKX" if country=="Kosovo"
save world_map, replace

////////////// 22. create auxiliary dataset with information on NTL availability

clear
set obs 7
gen min_year=.
gen max_year=.
gen source=""
replace source= "F10" in 1
replace source="F12" in 2
replace source="F14" in 3
replace source="F15" in 4
replace source="F16" in 5
replace source="F18" in 6
replace source="SNPP" in 7
replace min_year=1992 if source=="F10"
replace min_year=1994 if source=="F12"
replace min_year=1997 if source=="F14"
replace min_year=2000 if source=="F15"
replace min_year=2004 if source=="F16"
replace min_year=2010 if source=="F18"
replace min_year=2014 if source=="SNPP"
replace max_year=1994 if source=="F10"
replace max_year=1999 if source=="F12"
replace max_year=2003 if source=="F14"
replace max_year=2007 if source=="F15"
replace max_year=2009 if source=="F16"
replace max_year=2013 if source=="F18"
replace max_year=2018 if source=="SNPP"
gen id=_n
lab define ntl 1 "F10" 2 "F12" 3 "F14" 4 "F15" 5 "F16" 6 "F18" 7 "SNPP"
label values id ntl
drop source
save ntl_sources, replace

timer off 1
timer list 1 